function authorprofile = readauthor(path,type,data)
% reads all author files at path, and stores in matrix (document x frames)
if strcmp(type,'frame')
    %'Counting Frames'
    %path to LTH output for frame count
    cd 'path to authors\';
    %do the frame based count.
    files = ls([path,'/*.LTH']); %list all lth outpus files in directory
    for fidx=1:length(files(:,1)),
        authorprofile(fidx,:) = readxmlinfo([path,'/',files(fidx,:)],data);
    end
    
elseif strcmp(type,'fw')
    %'function words'
    %path to original papers for word count
    cd 'path to authors\';
    % do the function word count
    files = ls([path,'/*.ppLTH']); %list all lth outpus files in directory
    for fidx=1:length(files(:,1)),
        authorprofile(fidx,:) = readfwinfo([path,'/',files(fidx,:)],data);
    end
elseif strcmp(type,'ngram')
    %N-Grams
    ngrams=data{2};
    %path to original papers for word count
    cd 'path to authors\';
    files = ls([path,'/*.ppLTH']); %list all lth outpus files in directory
    
    authorprofile=[];
    for fidx=1:length(files(:,1)),
        [ngrams,ncount]=countNgrams([path,'/',files(fidx,:)],ngrams,data{1});
        diffsize=size(ncount,2)-size(authorprofile,2);
        if diffsize>0
            authorprofile=[authorprofile zeros(size(authorprofile,1),diffsize)];
        end
        authorprofile(fidx,:) = ncount;
        
    end
    authorprofile={authorprofile,ngrams};
    
else
    authorprofile=[];
end
end

function [ngrams,ncount] = countNgrams(path,ngrams,n)
%returns a list of all ngrams and their counts in string
%ngrams={};
%path='c:\leif.txt'
fid = fopen(path);
string = fread(fid, '*char');
fclose(fid);
%ncount=zeros(1,length(ngrams));
ncount=zeros(1,max(1000,size(ngrams)));
string=lower(string');
for i=1:length(string)-n+1
    key=string(i:i+n-1);
    pos=ngrams.get(key);
    if not(isequal(pos,[]))
        if length(ncount)>=pos
            ncount(pos)=ncount(pos)+1;
        else
            ncount(pos)=1;
        end
    else
        siz=size(ngrams)+1;
        ngrams.put(key,siz);
        ncount(siz)=1;
    end
end
end

function [WC] = readfwinfo(path,FWords)

%Reads fw counts for fw declared in array FWords for a file and
%returns it in a array indexed the same as fwords.
fid = fopen(path);
words = textscan(fid, '%s');
fclose(fid);
words=words{1};
WC=[];
for j=1:length(FWords)
    WC(j)=sum(strcmpi(words,FWords(j)));
end
end